####################################################################################################################
# Script: Anticipating the citizenship premium: before and after effects of immigrant naturalisation on employment #                                                                                                                  #
# Floris Peters, Maarten Vink & Hans Schmeets                                                                      #                                                                                                                  #
# Journal of Ethnic and Migration Studies                                                                          #
####################################################################################################################


################
# Introduction #
################

# This file provides the syntax used to create all the tables and figures in the paper. 
# The dataset contains sensitive, micro level information. As such, for privacy reasons the data is only available to individuals employed at or affiliated to Statistics Netherlands. 
# The dataset can be found at the following location on the network of Statistics Netherlands: \\cbsp.nl\Productie\Projecten\SAL\209253UM_FP_SEC1\Werk\Floris\PhD\JEMS_employment 

#######################################################################################################################
#######################################################################################################################

#############
# Variables #
#############

# ID
# (Individual identification number)
 
# EMPLOYMENT
# (Employment status)
# [0] Not employed; 
# [1] Employed

# NATURALIZATION
# (Moment of naturalization, categorical)
# [1] No naturalization;
# [2] >3 years prior to naturalization;
# [3] 3 years prior to naturalization; 
# [4] 2 years prior to naturalization;
# [5] 1 year prior to naturalization;
# [6] Year of naturalization;
# [7] 1 year after naturalization;
# [8] 2 years after naturalization;
# [9] 3 years after naturalization;
# [10] >3 years after naturalization

# YSN
# (Years since naturalization)

# IMMIGRATIONYEAR
# (Year of first immigration to the Netherlands)

# GENDER
# [1] Male; 
# [2] Female

# AGEARRIVAL
# (Age at the moment of migration)

# AGECAT 
# (Age at the moment of migration in categories) 
# [1] 20-24 years; 
# [2] 25-29 years; 
# [3] 30-34 years; 
# [4] 35-39 years; 
# [5] 40-44 years; 
# [6] 45-50 years

# RESIDENCE
# (Years since migration)

# RESIDENCECAT
# (Years since migration in categories)
# [1] 0-1 years; 
# [2] 2-3 years; 
# [3] 4-5 years; 
# [4] 6-7 years; 
# [5] 8-9 years

# PARTNER
# [1] No partner; 
# [2] Foreign born foreign partner; 
# [3] Foreign born Dutch partner; 
# [4] Native Dutch partner

# CHILD
# (Children in the household in categories)
# [0] no children <18 in household; 
# [1] Children <18 in household

# DEVELOPMENT
# (Human Development Index (HDI) score origin country)

# DEVCAT
# (Human Development index (HDI) score origin country in categories)
# [1] First quartile; 
# [2] Second quartile; 
# [3] Third quartile; 
# [4] Fourth quartile

# EU
# [0] Not EU country of origin; 
# [1] EU country of origin
  
# NATURALIZATIONSPEED
# (Speed of naturalization)
# [0] No naturalization;
# [1] 1-3 years;
# [2] 4 years;
# [3] 5 years;
# [4] 6 years;
# [5] 7 years;
# [6] 8-10 years
  
# TRUNCATION
# (Does the individual drop out of the dataset prematurely?)
# [0] Individual remains in the dataset for the entire observation period; 
# [1] Individual does not remain in the dataset for the entire observation period
  
# DISTANCE
# (Geographical distance between the origin country and the Netherlands)
  
#######################################################################################################################
#######################################################################################################################


#upload the dataset (Data_cit_empl_main.sav) to R and load the necessary libraries#
library(Matrix)
library(lme4)
library(optimx)
library(dplyr)
dataset_main <- read.csv(file.choose(),header=T,sep=";")


###########     
# Table 1 #
###########  
  
#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)
  
#create variable 'ever naturalized'
dataset_male$NATIOEVER <- NA
dataset_male$NATIOEVER[dataset_male$NATURALIZATION == 1] <- 0
dataset_male$NATIOEVER[dataset_male$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_male$NATURALIZED <- NA
dataset_male$NATURALIZED[dataset_male$NATURALIZATION > 5] <- 1
dataset_male$NATURALIZED[dataset_male$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_male$RESIDENCECENTER <- mean(dataset_male$RESIDENCE)
dataset_male$RESIDENCENATIO <- dataset_male$NATIOEVER * dataset_male$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_male$NATU_YSN <- dataset_male$NATURALIZED * dataset_male$YSN
  
#table 1 regression: male immigrants
table_1_male <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                        as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                        data = dataset_male, family = binomial("logit"), 
                        control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#create variable 'ever naturalized'
dataset_female$NATIOEVER <- NA
dataset_female$NATIOEVER[dataset_female$NATURALIZATION == 1] <- 0
dataset_female$NATIOEVER[dataset_female$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_female$NATURALIZED <- NA
dataset_female$NATURALIZED[dataset_female$NATURALIZATION > 5] <- 1
dataset_female$NATURALIZED[dataset_female$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_female$RESIDENCECENTER <- mean(dataset_female$RESIDENCE)
dataset_female$RESIDENCENATIO <- dataset_female$NATIOEVER * dataset_female$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_female$NATU_YSN <- dataset_female$NATURALIZED * dataset_female$YSN

#table 1 regression: female immigrants
table_1_female <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                          as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                          data = dataset_female, family = binomial("logit"), 
                          control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


###########     
# Table 2 #
###########   

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants from less developed countries
dataset_male_low <- subset(dataset_male, DEVELOPMENTREC <= median(DEVELOPMENTREC))  

#create variable 'ever naturalized'
dataset_male_low$NATIOEVER <- NA
dataset_male_low$NATIOEVER[dataset_male_low$NATURALIZATION == 1] <- 0
dataset_male_low$NATIOEVER[dataset_male_low$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_male_low$NATURALIZED <- NA
dataset_male_low$NATURALIZED[dataset_male_low$NATURALIZATION > 5] <- 1
dataset_male_low$NATURALIZED[dataset_male_low$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_male_low$RESIDENCECENTER <- mean(dataset_male_low$RESIDENCE)
dataset_male_low$RESIDENCENATIO <- dataset_male_low$NATIOEVER * dataset_male_low$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_male_low$NATU_YSN <- dataset_male_low$NATURALIZED * dataset_male_low$YSN  

#table 2 regression: male immigrants; low development
table_2_male_low <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                            as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                            data = dataset_male_low, family = binomial("logit"), 
                            control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants from high developed countries
dataset_male_high <- subset(dataset_male, DEVELOPMENTREC > median(DEVELOPMENTREC))  

#create variable 'ever naturalized'
dataset_male_high$NATIOEVER <- NA
dataset_male_high$NATIOEVER[dataset_male_high$NATURALIZATION == 1] <- 0
dataset_male_high$NATIOEVER[dataset_male_high$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_male_high$NATURALIZED <- NA
dataset_male_high$NATURALIZED[dataset_male_high$NATURALIZATION > 5] <- 1
dataset_male_high$NATURALIZED[dataset_male_high$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_male_high$RESIDENCECENTER <- mean(dataset_male_high$RESIDENCE)
dataset_male_high$RESIDENCENATIO <- dataset_male_high$NATIOEVER * dataset_male_high$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_male_high$NATU_YSN <- dataset_male_high$NATURALIZED * dataset_male_high$YSN  

#table 2 regression: male immigrants; high development
table_2_male_high <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                            as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                            data = dataset_male_high, family = binomial("logit"), 
                            control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants from less developed countries
dataset_female_low <- subset(dataset_female, DEVELOPMENTREC <= median(DEVELOPMENTREC))  

#create variable 'ever naturalized'
dataset_female_low$NATIOEVER <- NA
dataset_female_low$NATIOEVER[dataset_female_low$NATURALIZATION == 1] <- 0
dataset_female_low$NATIOEVER[dataset_female_low$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_female_low$NATURALIZED <- NA
dataset_female_low$NATURALIZED[dataset_female_low$NATURALIZATION > 5] <- 1
dataset_female_low$NATURALIZED[dataset_female_low$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_female_low$RESIDENCECENTER <- mean(dataset_female_low$RESIDENCE)
dataset_female_low$RESIDENCENATIO <- dataset_female_low$NATIOEVER * dataset_female_low$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_female_low$NATU_YSN <- dataset_female_low$NATURALIZED * dataset_female_low$YSN  

#table 2 regression: female immigrants; low development
table_2_female_low <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                            as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                            data = dataset_female_low, family = binomial("logit"), 
                            control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants from high developed countries
dataset_female_high <- subset(dataset_female, DEVELOPMENTREC > median(DEVELOPMENTREC))  

#create variable 'ever naturalized'
dataset_female_high$NATIOEVER <- NA
dataset_female_high$NATIOEVER[dataset_female_high$NATURALIZATION == 1] <- 0
dataset_female_high$NATIOEVER[dataset_female_high$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_female_high$NATURALIZED <- NA
dataset_female_high$NATURALIZED[dataset_female_high$NATURALIZATION > 5] <- 1
dataset_female_high$NATURALIZED[dataset_female_high$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_female_high$RESIDENCECENTER <- mean(dataset_female_high$RESIDENCE)
dataset_female_high$RESIDENCENATIO <- dataset_female_high$NATIOEVER * dataset_female_high$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_female_high$NATU_YSN <- dataset_female_high$NATURALIZED * dataset_female_high$YSN  

#table 2 regression: female immigrants; high development
table_2_female_high <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                             as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                             data = dataset_female_high, family = binomial("logit"), 
                             control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


###########     
# Table 3 #
###########  

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants from non-EU countries
dataset_male_non_EU <- subset(dataset_male, EU == 0)

#create variable 'ever naturalized'
dataset_male_non_EU$NATIOEVER <- NA
dataset_male_non_EU$NATIOEVER[dataset_male_non_EU$NATURALIZATION == 1] <- 0
dataset_male_non_EU$NATIOEVER[dataset_male_non_EU$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_male_non_EU$NATURALIZED <- NA
dataset_male_non_EU$NATURALIZED[dataset_male_non_EU$NATURALIZATION > 5] <- 1
dataset_male_non_EU$NATURALIZED[dataset_male_non_EU$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_male_non_EU$RESIDENCECENTER <- mean(dataset_male_non_EU$RESIDENCE)
dataset_male_non_EU$RESIDENCENATIO <- dataset_male_non_EU$NATIOEVER * dataset_male_non_EU$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_male_non_EU$NATU_YSN <- dataset_male_non_EU$NATURALIZED * dataset_male_non_EU$YSN  

#table 3 regression: male immigrants; non-EU
table_3_male_non_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                               as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                               data = dataset_male_non_EU, family = binomial("logit"), 
                               control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants from EU countries
dataset_male_EU <- subset(dataset_male, EU == 1)

#create variable 'ever naturalized'
dataset_male_EU$NATIOEVER <- NA
dataset_male_EU$NATIOEVER[dataset_male_EU$NATURALIZATION == 1] <- 0
dataset_male_EU$NATIOEVER[dataset_male_EU$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_male_EU$NATURALIZED <- NA
dataset_male_EU$NATURALIZED[dataset_male_EU$NATURALIZATION > 5] <- 1
dataset_male_EU$NATURALIZED[dataset_male_EU$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_male_EU$RESIDENCECENTER <- mean(dataset_male_EU$RESIDENCE)
dataset_male_EU$RESIDENCENATIO <- dataset_male_EU$NATIOEVER * dataset_male_EU$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_male_EU$NATU_YSN <- dataset_male_EU$NATURALIZED * dataset_male_EU$YSN  

#table 3 regression: male immigrants; EU
table_3_male_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                               as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                               data = dataset_male_EU, family = binomial("logit"), 
                               control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))  
 
#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants from non-EU countries
dataset_female_non_EU <- subset(dataset_female, EU == 0)

#create variable 'ever naturalized'
dataset_female_non_EU$NATIOEVER <- NA
dataset_female_non_EU$NATIOEVER[dataset_female_non_EU$NATURALIZATION == 1] <- 0
dataset_female_non_EU$NATIOEVER[dataset_female_non_EU$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_female_non_EU$NATURALIZED <- NA
dataset_female_non_EU$NATURALIZED[dataset_female_non_EU$NATURALIZATION > 5] <- 1
dataset_female_non_EU$NATURALIZED[dataset_female_non_EU$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_female_non_EU$RESIDENCECENTER <- mean(dataset_female_non_EU$RESIDENCE)
dataset_female_non_EU$RESIDENCENATIO <- dataset_female_non_EU$NATIOEVER * dataset_female_non_EU$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_female_non_EU$NATU_YSN <- dataset_female_non_EU$NATURALIZED * dataset_female_non_EU$YSN  

#table 3 regression: female immigrants; non-EU
table_3_female_non_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                               as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                               data = dataset_female_non_EU, family = binomial("logit"), 
                               control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants from EU countries
dataset_female_EU <- subset(dataset_female, EU == 1)

#create variable 'ever naturalized'
dataset_female_EU$NATIOEVER <- NA
dataset_female_EU$NATIOEVER[dataset_female_EU$NATURALIZATION == 1] <- 0
dataset_female_EU$NATIOEVER[dataset_female_EU$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_female_EU$NATURALIZED <- NA
dataset_female_EU$NATURALIZED[dataset_female_EU$NATURALIZATION > 5] <- 1
dataset_female_EU$NATURALIZED[dataset_female_EU$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_female_EU$RESIDENCECENTER <- mean(dataset_female_EU$RESIDENCE)
dataset_female_EU$RESIDENCENATIO <- dataset_female_EU$NATIOEVER * dataset_female_EU$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_female_EU$NATU_YSN <- dataset_female_EU$NATURALIZED * dataset_female_EU$YSN  

#table 3 regression: female immigrants; EU
table_3_female_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                           as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                           data = dataset_female_EU, family = binomial("logit"), 
                           control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))  

# Figures 2-4 are based on the coefficients of Tables A2-A4 (see below)
  
  
############     
# Table A1 #
############  

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#employment rates by covariates
rate_naturalization_male <- prop.table(table(dataset_male$NATURALIZATION, dataset_male$EMPLOYMENT), 1)
rate_age_male <- prop.table(table(dataset_male$AGECAT, dataset_male$EMPLOYMENT), 1)
rate_residence_male <- prop.table(table(dataset_male$RESIDENCECAT, dataset_male$EMPLOYMENT), 1)
rate_partner_male <- prop.table(table(dataset_male$PARTNER, dataset_male$EMPLOYMENT), 1)
rate_child_male <- prop.table(table(dataset_male$CHILD, dataset_male$EMPLOYMENT), 1)
rate_hdi_male <- prop.table(table(dataset_male$DEVCAT, dataset_male$EMPLOYMENT), 1)
rate_eu_male <- prop.table(table(dataset_male$EU, dataset_male$EMPLOYMENT), 1)

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#employment rates by covariates
rate_naturalization_female <- prop.table(table(dataset_female$NATURALIZATION, dataset_female$EMPLOYMENT), 1)
rate_age_female <- prop.table(table(dataset_female$AGECAT, dataset_female$EMPLOYMENT), 1)
rate_residence_female <- prop.table(table(dataset_female$RESIDENCECAT, dataset_female$EMPLOYMENT), 1)
rate_partner_female <- prop.table(table(dataset_female$PARTNER, dataset_female$EMPLOYMENT), 1)
rate_child_female <- prop.table(table(dataset_female$CHILD, dataset_female$EMPLOYMENT), 1)
rate_hdi_female <- prop.table(table(dataset_female$DEVCAT, dataset_female$EMPLOYMENT), 1)
rate_eu_female <- prop.table(table(dataset_female$EU, dataset_female$EMPLOYMENT), 1) 
  

############     
# Table A2 #
############  

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants who naturalize during observation period
dataset_male_nat <- subset(dataset_male, NATURALIZATION > 1) 

#table A2 regression: male immigrants
table_A2_male <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                       (1 | as.factor(ID)), data = dataset_male_nat, family = binomial("logit"), 
                       control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants who naturalize during observation period
dataset_female_nat <- subset(dataset_female, NATURALIZATION > 1) 

#table A2 regression: female immigrants
table_A2_female <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                         (1 | as.factor(ID)), data = dataset_female_nat, family = binomial("logit"), 
                         control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))
  

#############     
# Table A3a #
############# 
  
#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants who naturalize during observation period
dataset_male_nat <- subset(dataset_male, NATURALIZATION > 1) 

#select immigrants from low developed countries
dataset_male_nat_low <- subset(dataset_male_nat, DEVELOPMENTREC <= median(DEVELOPMENTREC))  

#table A3a regression: male immigrants; low development
table_A3a_male_low <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                           (1 | as.factor(ID)), data = dataset_male_nat_low, family = binomial("logit"), 
                           control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants who naturalize during observation period
dataset_male_nat <- subset(dataset_male, NATURALIZATION > 1) 

#select immigrants from high developed countries
dataset_male_nat_high <- subset(dataset_male_nat, DEVELOPMENTREC > median(DEVELOPMENTREC))  

#table A3a regression: male immigrants; high development
table_A3a_male_high <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                            (1 | as.factor(ID)), data = dataset_male_nat_high, family = binomial("logit"), 
                            control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


#############     
# Table A3b #
############# 

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants who naturalize during observation period
dataset_female_nat <- subset(dataset_female, NATURALIZATION > 1) 

#select immigrants from low developed countries
dataset_female_nat_low <- subset(dataset_female_nat, DEVELOPMENTREC <= median(DEVELOPMENTREC))  

#table A3b regression: female immigrants; low development
table_A3b_female_low <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                              (1 | as.factor(ID)), data = dataset_female_nat_low, family = binomial("logit"), 
                              control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants who naturalize during observation period
dataset_female_nat <- subset(dataset_female, NATURALIZATION > 1) 

#select immigrants from high developed countries
dataset_female_nat_high <- subset(dataset_female_nat, DEVELOPMENTREC > median(DEVELOPMENTREC))  

#table A3b regression: female immigrants; high development
table_A3b_female_high <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                               (1 | as.factor(ID)), data = dataset_female_nat_high, family = binomial("logit"), 
                               control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


#############   
# Table A4a #
#############

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants who naturalize during observation period
dataset_male_nat <- subset(dataset_male, NATURALIZATION > 1) 

#select immigrants from non-EU countries
dataset_male_nat_non_EU <- subset(dataset_male_nat, EU == 0) 

#table A4a regression: male immigrants; non_EU
table_A4a_male_non_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                             (1 | as.factor(ID)), data = dataset_male_nat_non_EU, family = binomial("logit"), 
                             control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants who naturalize during observation period
dataset_male_nat <- subset(dataset_male, NATURALIZATION > 1) 

#select immigrants from EU countries
dataset_male_nat_EU <- subset(dataset_male_nat, EU == 1)  

#table A4a regression: male immigrants; high development
table_A4a_male_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                               (1 | as.factor(ID)), data = dataset_male_nat_EU, family = binomial("logit"), 
                               control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


#############     
# Table A4b #
############# 

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants who naturalize during observation period
dataset_female_nat <- subset(dataset_female, NATURALIZATION > 1) 

#select immigrants from low developed countries
dataset_female_nat_non_EU <- subset(dataset_female_nat, EU == 0)  

#table A4b regression: female immigrants; non-EU
table_A4b_female_non_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                                (1 | as.factor(ID)), data = dataset_female_nat_non_EU, family = binomial("logit"), 
                                control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants who naturalize during observation period
dataset_female_nat <- subset(dataset_female, NATURALIZATION > 1) 

#select immigrants from non-EU countries
dataset_female_nat_EU <- subset(dataset_female_nat, EU == 1)  

#table A4b regression: female immigrants; EU
table_A4b_female_EU <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) + 
                                 (1 | as.factor(ID)), data = dataset_female_nat_EU, family = binomial("logit"), 
                                 control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


############     
# Table A5 #
############

#load dataset "Data_cit_empl_var-novar", which includes individuals withhout variation on the dependent variable  
dataset_var_novar <- read.csv(file.choose(),header=T,sep=";")  

#select male immigrants
dataset_var_novar_male <- subset(dataset_main, GENDER == 1)

#1-period lag employment variable  
dataset_var_novar_male$LAG_EMPLOYMENT <- lag(dataset_var_novar_male$EMPLOYMENT, k = 1)

#identify first case per individual
dataset_var_novar_male$first_row <- dataset_var_novar_male %>%
  group_by(ID) %>%
  arrange(ID) %>%
  filter(row_number()==1)

#extract first case per individual
dataset_var_novar_male$first_row = subset(dataset_var_novar_male$first_row, 
                                          select = c(dataset_var_novar_male$ID,dataset_var_novar_male$YEAR))

#create dummy variable indicating first case per individual
dataset_var_novar_male$first_case <- 1

#attach dummy variable to first case
dataset_var_novar_male$first_row <- cbind(cbind, dataset_var_novar_male$first_row,dataset_var_novar_male$first_case)

#merge first case with main dataset and sort by ID and YEAR
dataset_var_novar_male <- merge(dataset_var_novar_male, first_row, by = c("ID","YEAR"), all = T)
dataset_var_novar_male <- dataset_var_novar_male[with(dataset_var_novar_male, order(ID,YEAR)),]

#remove first case per individual
dataset_var_novar_male <- subset(dataset_var_novar_male, first_case != 1)

#create variable 'ever naturalized'
dataset_var_novar_male$NATIOEVER <- NA
dataset_var_novar_male$NATIOEVER[dataset_var_novar_male$NATURALIZATION == 1] <- 0
dataset_var_novar_male$NATIOEVER[dataset_var_novar_male$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_var_novar_male$NATURALIZED <- NA
dataset_var_novar_male$NATURALIZED[dataset_var_novar_male$NATURALIZATION > 5] <- 1
dataset_var_novar_male$NATURALIZED[dataset_var_novar_male$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_var_novar_male$RESIDENCECENTER <- mean(dataset_var_novar_male$RESIDENCE)
dataset_var_novar_male$RESIDENCENATIO <- dataset_var_novar_male$NATIOEVER * dataset_var_novar_male$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_var_novar_male$NATU_YSN <- dataset_var_novar_male$NATURALIZED * dataset_var_novar_male$YSN

#table A5 regression: male immigrants; logistic
table_A5_male <- glm(EMPLOYMENT ~ NATURALIZATIONSPEED + AGEARRIVAL + RESIDENCENATIO + RESIDENCE + 
                       PARTNER + CHILD + EU + LAG_EMPLOYMENT, 
                       family = binomial(link = "logit"), data = dataset_var_novar_male)

#load dataset "Data_cit_empl_var-novar", which includes individuals withhout variation on the dependent variable  
dataset_var_novar <- read.csv(file.choose(),header=T,sep=";")  

#select female immigrants
dataset_var_novar_female <- subset(dataset_main, GENDER == 2)

#1-period lag employment variable  
dataset_var_novar_female$LAG_EMPLOYMENT <- lag(dataset_var_novar_female$EMPLOYMENT, k = 1)

#identify first case per individual
dataset_var_novar_female$first_row <- dataset_var_novar_female %>%
  group_by(ID) %>%
  arrange(ID) %>%
  filter(row_number()==1)

#extract first case per individual
dataset_var_novar_female$first_row = subset(dataset_var_novar_female$first_row, 
                                          select = c(dataset_var_novar_female$ID,dataset_var_novar_female$YEAR))

#create dummy variable indicating first case per individual
dataset_var_novar_female$first_case <- 1

#attach dummy variable to first case
dataset_var_novar_female$first_row <- cbind(cbind, dataset_var_novar_female$first_row,dataset_var_novar_female$first_case)

#merge first case with main dataset and sort by ID and YEAR
dataset_var_novar_female <- merge(dataset_var_novar_female, first_row, by = c("ID","YEAR"), all = T)
dataset_var_novar_female <- dataset_var_novar_female[with(dataset_var_novar_female, order(ID,YEAR)),]

#remove first case per individual
dataset_var_novar_female <- subset(dataset_var_novar_female, first_case != 1)

#create variable 'ever naturalized'
dataset_var_novar_female$NATIOEVER <- NA
dataset_var_novar_female$NATIOEVER[dataset_var_novar_female$NATURALIZATION == 1] <- 0
dataset_var_novar_female$NATIOEVER[dataset_var_novar_female$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_var_novar_female$NATURALIZED <- NA
dataset_var_novar_female$NATURALIZED[dataset_var_novar_female$NATURALIZATION > 5] <- 1
dataset_var_novar_female$NATURALIZED[dataset_var_novar_female$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_var_novar_female$RESIDENCECENTER <- mean(dataset_var_novar_female$RESIDENCE)
dataset_var_novar_female$RESIDENCENATIO <- dataset_var_novar_female$NATIOEVER * dataset_var_novar_female$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_var_novar_female$NATU_YSN <- dataset_var_novar_female$NATURALIZED * dataset_var_novar_female$YSN

#table A5 regression: female immigrants; logistic
table_A5_female <- glm(EMPLOYMENT ~ NATURALIZATIONSPEED + AGEARRIVAL + RESIDENCENATIO + RESIDENCE + 
                       PARTNER + CHILD + EU + LAG_EMPLOYMENT, 
                     family = binomial(link = "logit"), data = dataset_var_novar_female)


############     
# Table A6 #
############

#load dataset "Male_1996-2002", which includes individuals from cohorts 1996-1997  
dataset_1996_2002 <- read.csv(file.choose(),header=T,sep=";")  

#select male immigrants
dataset_1996_2002_male <- subset(dataset_1996_2002, GENDER == 1)

#select cohorts < 1998 
dataset_1996_1997_male <- subset(dataset_1996_2002_male, IMMIGRATIONYEAR < 1998)

#create variable 'ever naturalized'
dataset_1996_1997_male$NATIOEVER <- NA
dataset_1996_1997_male$NATIOEVER[dataset_1996_1997_male$NATURALIZATION == 1] <- 0
dataset_1996_1997_male$NATIOEVER[dataset_1996_1997_male$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_1996_1997_male$NATURALIZED <- NA
dataset_1996_1997_male$NATURALIZED[dataset_1996_1997_male$NATURALIZATION > 5] <- 1
dataset_1996_1997_male$NATURALIZED[dataset_1996_1997_male$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_1996_1997_male$RESIDENCECENTER <- mean(dataset_1996_1997_male$RESIDENCE)
dataset_1996_1997_male$RESIDENCENATIO <- dataset_1996_1997_male$NATIOEVER * dataset_1996_1997_male$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_1996_1997_male$NATU_YSN <- dataset_1996_1997_male$NATURALIZED * dataset_1996_1997_male$YSN

#table A6 regression: male immigrants; cohorts 1996-1997
table_A6_male_1996_1997 <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                                 as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                                 data = dataset_1996_1997_male, family = binomial("logit"), 
                                 control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


#load dataset "Male_1996-2002", which includes individuals from cohorts 1996-1997  
dataset_1996_2002 <- read.csv(file.choose(),header=T,sep=";")  

#select male immigrants
dataset_1996_2002_male <- subset(dataset_1996_2002, GENDER == 1)

#select cohorts > 2000 
dataset_2001_2002_male <- subset(dataset_1996_2002_male, IMMIGRATIONYEAR > 2000)

#create variable 'ever naturalized'
dataset_2001_2002_male$NATIOEVER <- NA
dataset_2001_2002_male$NATIOEVER[dataset_2001_2002_male$NATURALIZATION == 1] <- 0
dataset_2001_2002_male$NATIOEVER[dataset_2001_2002_male$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_2001_2002_male$NATURALIZED <- NA
dataset_2001_2002_male$NATURALIZED[dataset_2001_2002_male$NATURALIZATION > 5] <- 1
dataset_2001_2002_male$NATURALIZED[dataset_2001_2002_male$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_2001_2002_male$RESIDENCECENTER <- mean(dataset_2001_2002_male$RESIDENCE)
dataset_2001_2002_male$RESIDENCENATIO <- dataset_2001_2002_male$NATIOEVER * dataset_2001_2002_male$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_2001_2002_male$NATU_YSN <- dataset_2001_2002_male$NATURALIZED * dataset_2001_2002_male$YSN

#table A6 regression: male immigrants; cohorts 2001-2002
table_A6_male_2001_2002 <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                                 as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                                 data = dataset_2001_2002_male, family = binomial("logit"), 
                                 control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#load dataset "Female_1996-2002", which includes individuals from cohorts 1996-1997  
dataset_1996_2002 <- read.csv(file.choose(),header=T,sep=";")  

#select female immigrants
dataset_1996_2002_female <- subset(dataset_1996_2002, GENDER == 2)

#select cohorts < 1998 
dataset_1996_1997_female <- subset(dataset_1996_2002_female, IMMIGRATIONYEAR < 1998)

#create variable 'ever naturalized'
dataset_1996_1997_female$NATIOEVER <- NA
dataset_1996_1997_female$NATIOEVER[dataset_1996_1997_female$NATURALIZATION == 1] <- 0
dataset_1996_1997_female$NATIOEVER[dataset_1996_1997_female$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_1996_1997_female$NATURALIZED <- NA
dataset_1996_1997_female$NATURALIZED[dataset_1996_1997_female$NATURALIZATION > 5] <- 1
dataset_1996_1997_female$NATURALIZED[dataset_1996_1997_female$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_1996_1997_female$RESIDENCECENTER <- mean(dataset_1996_1997_female$RESIDENCE)
dataset_1996_1997_female$RESIDENCENATIO <- dataset_1996_1997_female$NATIOEVER * dataset_1996_1997_female$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_1996_1997_female$NATU_YSN <- dataset_1996_1997_female$NATURALIZED * dataset_1996_1997_female$YSN

#table A6 regression: female immigrants; cohorts 1996-1997
table_A6_female_1996_1997 <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                                   as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                                   data = dataset_1996_1997_female, family = binomial("logit"), 
                                   control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#load dataset "Female_1996-2002", which includes individuals from cohorts 1996-1997  
dataset_1996_2002 <- read.csv(file.choose(),header=T,sep=";")  

#select female immigrants
dataset_1996_2002_female <- subset(dataset_1996_2002, GENDER == 2)

#select cohorts > 2000 
dataset_2001_2002_female <- subset(dataset_1996_2002_female, IMMIGRATIONYEAR > 2000)

#create variable 'ever naturalized'
dataset_2001_2002_female$NATIOEVER <- NA
dataset_2001_2002_female$NATIOEVER[dataset_2001_2002_female$NATURALIZATION == 1] <- 0
dataset_2001_2002_female$NATIOEVER[dataset_2001_2002_female$NATURALIZATION > 1] <- 1

#create variable 'naturalized during given observation'  
dataset_2001_2002_female$NATURALIZED <- NA
dataset_2001_2002_female$NATURALIZED[dataset_2001_2002_female$NATURALIZATION > 5] <- 1
dataset_2001_2002_female$NATURALIZED[dataset_2001_2002_female$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_2001_2002_female$RESIDENCECENTER <- mean(dataset_2001_2002_female$RESIDENCE)
dataset_2001_2002_female$RESIDENCENATIO <- dataset_2001_2002_female$NATIOEVER * dataset_2001_2002_female$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_2001_2002_female$NATU_YSN <- dataset_2001_2002_female$NATURALIZED * dataset_2001_2002_female$YSN

#table A6 regression: female immigrants; cohorts 2001-2002
table_A6_female_2001_2002 <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCENATIO + NATU_YSN + RESIDENCE + 
                                   as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                                   data = dataset_2001_2002_female, family = binomial("logit"), 
                                   control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


############     
# Table A7 #
############

#load dataset "Data_cit_empl_var-novar", which includes individuals withhout variation on the dependent variable  
dataset_var_novar <- read.csv(file.choose(),header=T,sep=";")  

#select male immigrants
dataset_var_novar_male <- subset(dataset_main, GENDER == 1)

#1-period lag employment variable  
dataset_var_novar_male$LAG_EMPLOYMENT <- lag(dataset_var_novar_male$EMPLOYMENT, k = 1)

#identify first case per individual
dataset_var_novar_male$first_row <- dataset_var_novar_male %>%
  group_by(ID) %>%
  arrange(ID) %>%
  filter(row_number()==1)

#extract first case per individual
dataset_var_novar_male$first_row = subset(dataset_var_novar_male$first_row, 
                                          select = c(dataset_var_novar_male$ID,dataset_var_novar_male$YEAR))

#create dummy variable indicating first case per individual
dataset_var_novar_male$first_case <- 1

#attach dummy variable to first case
dataset_var_novar_male$first_row <- cbind(cbind, dataset_var_novar_male$first_row,dataset_var_novar_male$first_case)

#merge first case with main dataset and sort by ID and YEAR
dataset_var_novar_male <- merge(dataset_var_novar_male, first_row, by = c("ID","YEAR"), all = T)
dataset_var_novar_male <- dataset_var_novar_male[with(dataset_var_novar_male, order(ID,YEAR)),]

#remove first case per individual
dataset_var_novar_male <- subset(dataset_var_novar_male, first_case != 1)

#select immigrants who naturalize during the observation period
dataset_var_novar_male_nat <- subset(dataset_var_novar_male, NATURALIZATION > 1)

#table A7 regression: male immigrants who naturalize during the observation period; logistic
table_A7_male_nat <- glm(EMPLOYMENT ~ NATURALIZATION + AGEARRIVAL + RESIDENCE + 
                         PARTNER + CHILD + EU + LAG_EMPLOYMENT, 
                         family = binomial(link = "logit"), data = dataset_var_novar_male_nat)

#load dataset "Data_cit_empl_var-novar", which includes individuals withhout variation on the dependent variable  
dataset_var_novar <- read.csv(file.choose(),header=T,sep=";")  

#select female immigrants
dataset_var_novar_female <- subset(dataset_main, GENDER == 2)

#1-period lag employment variable  
dataset_var_novar_female$LAG_EMPLOYMENT <- lag(dataset_var_novar_female$EMPLOYMENT, k = 1)

#identify first case per individual
dataset_var_novar_female$first_row <- dataset_var_novar_female %>%
  group_by(ID) %>%
  arrange(ID) %>%
  filter(row_number()==1)

#extract first case per individual
dataset_var_novar_female$first_row = subset(dataset_var_novar_female$first_row, 
                                          select = c(dataset_var_novar_female$ID,dataset_var_novar_female$YEAR))

#create dummy variable indicating first case per individual
dataset_var_novar_female$first_case <- 1

#attach dummy variable to first case
dataset_var_novar_female$first_row <- cbind(cbind, dataset_var_novar_female$first_row,dataset_var_novar_female$first_case)

#merge first case with main dataset and sort by ID and YEAR
dataset_var_novar_female <- merge(dataset_var_novar_female, first_row, by = c("ID","YEAR"), all = T)
dataset_var_novar_female <- dataset_var_novar_female[with(dataset_var_novar_female, order(ID,YEAR)),]

#remove first case per individual
dataset_var_novar_female <- subset(dataset_var_novar_female, first_case != 1)

#select immigrants who naturalize during the observation period
dataset_var_novar_female_nat <- subset(dataset_var_novar_female, NATURALIZATION > 1)

#table A7 regression: female immigrants who naturalize during the observation period; logistic
table_A7_female_nat <- glm(EMPLOYMENT ~ NATURALIZATION + AGEARRIVAL + RESIDENCE + 
                           PARTNER + CHILD + EU + LAG_EMPLOYMENT, 
                           family = binomial(link = "logit"), data = dataset_var_novar_female_nat)


############     
# Table A8 #
############   

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#select immigrants who naturalize during the observation period
dataset_male_nat <- subset(dataset_male, NATURALIZATION > 1)

#select immigrants without right-truncation
dataset_male_nat_trunc <- subset(dataset_male_nat, TRUNCATION == 0)  

#table A8 regression: male immigrants; low development
table_A8_male_nat_trunc <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + 
                                 as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                                 data = dataset_male_nat_trunc, family = binomial("logit"), 
                                 control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#select immigrants who naturalize during the observation period
dataset_female_nat <- subset(datasetfe_male, NATURALIZATION > 1)

#select immigrants without right-truncation
dataset_female_nat_trunc <- subset(dataset_female_nat, TRUNCATION == 0)  

#table A8 regression: female immigrants; low development
table_A8_female_nat_trunc <- glmer(EMPLOYMENT ~ as.factor(NATURALIZATION) + RESIDENCE + 
                                   as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                                   data = dataset_female_nat_trunc, family = binomial("logit"), 
                                   control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))


############     
# Table A9 #
############  

#select male immigrants
dataset_male <- subset(dataset_main, GENDER == 1)

#create variable 'geographical distance'
dataset_male$DUMDISTANCE <- NA
dataset_male$DUMDISTANCE[dataset_male$DUMDISTANCE < median(dataset_male$DUMDISTANCE)] <- 0
dataset_male$DUMDISTANCE[dataset_male$DUMDISTANCE >= median(dataset_male$DUMDISTANCE)] <- 1

#create variable 'naturalized during given observation'  
dataset_male$NATURALIZED <- NA
dataset_male$NATURALIZED[dataset_male$NATURALIZATION > 5] <- 1
dataset_male$NATURALIZED[dataset_male$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_male$RESIDENCECENTER <- mean(dataset_male$RESIDENCE)
dataset_male$RESIDENCEDISTANCE <- dataset_male$DUMDISTANCE * dataset_male$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_male$NATU_YSN <- dataset_male$NATURALIZED * dataset_male$YSN

#table A9 regression: male immigrants
table_A9_male <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCEDISTANCE + NATU_YSN + RESIDENCE + 
                        as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                        data = dataset_male, family = binomial("logit"), 
                        control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))

#select female immigrants
dataset_female <- subset(dataset_main, GENDER == 2)

#create variable 'geographical distance'
dataset_female$DUMDISTANCE <- NA
dataset_female$DUMDISTANCE[dataset_female$DUMDISTANCE < median(dataset_female$DUMDISTANCE)] <- 0
dataset_female$DUMDISTANCE[dataset_female$DUMDISTANCE >= median(dataset_female$DUMDISTANCE)] <- 1

#create variable 'naturalized during given observation'  
dataset_female$NATURALIZED <- NA
dataset_female$NATURALIZED[dataset_female$NATURALIZATION > 5] <- 1
dataset_female$NATURALIZED[dataset_female$NATURALIZATION < 6] <- 0

#create variable 'ever naturalized' * (mean-centered) years since migration  
dataset_female$RESIDENCECENTER <- mean(dataset_female$RESIDENCE)
dataset_female$RESIDENCEDISTANCE <- dataset_female$DUMDISTANCE * dataset_female$RESIDENCECENTER

#create variable 'naturalized during given observation' * (mean-centered) years since migration    
dataset_female$NATU_YSN <- dataset_female$NATURALIZED * dataset_female$YSN

#table A9 regression: female immigrants
table_A9_female <- glmer(EMPLOYMENT ~ as.factor(NATURALIZED) + RESIDENCEDISTANCE + NATU_YSN + RESIDENCE + 
                         as.factor(PARTNER) + as.factor(CHILD) + (1 | as.factor(ID)), 
                         data = dataset_female, family = binomial("logit"), 
                         control = glmerControl(optimizer = "optimx", optCtrl = list(method = "nlminb")))
